"""
# coding     : utf-8 
# Time       : 2025/6/12 9:29
# Author     : chenxianb
# version    : python 3.8.2
# Description: 获取百度新闻热搜
"""
import os.path
import time

import openpyxl
import requests
from bs4 import BeautifulSoup


def get_html_text(url):
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:139.0) Gecko/20100101 Firefox/139.0"}

    try:
        r = requests.get(url, headers=header, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""


def get_news(html):
    info = []
    soup = BeautifulSoup(html, "html.parser")
    txt = soup.find_all("a", "title_dIF3B")

    for item in txt:
        title = item.div.text.strip()
        url = item["href"]

        info.append([title, url])

    return info


def save_news(info, path):

    if not os.path.exists(path):
        os.mkdir(path)

    # 年月日时分秒
    datetime = time.strftime("%Y%m%d%H%M%S", time.localtime())
    file = f"{path}/百度热搜{datetime}.xlsx"

    wb = openpyxl.Workbook(file)
    table = wb.create_sheet("news", 0)

    title = ["名称", "链接"]
    table.append(title)
    for item in info:
        table.append(item)

    wb.save(file)
    wb.close()
    print("保存成功")


if __name__ == '__main__':
    url = "https://top.baidu.com/board?tab=realtime"
    path = "./news/"
    html = get_html_text(url)
    info = get_news(html)
    save_news(info, path)
